/****************************************************************************************************************************                                                                                                                                     
 	PROGRAMMED BY:	Mette Foged 
	PROJECT OWNER:	Mette Foged  
	RESEARCHERS:	EARN 
 	DESCRIPTION: 	1)	Load IDAN type=H (information about main job in November), and AKM variables with suffix 13.
					2)	Occupations: Create crosswalk from disco-08 to disco-88 and implement it - details in section 2 below. 
						Occupation variables in output data:
						- 	disco: the actual occupation code
						- 	disco-88 
						- 	isco-88, where isco = first 4 digits of disco
						Note: isco = International Standard Classification of Occupations (see ILO and DST for details)			
*****************************************************************************************************************************/


dm 'cle log';
dm 'cle out';

options obs=max;

libname grund 'H:\Rawdata\707455\Grunddata';
libname raw 'H:\Rawdata\707455\Views';
libname temp 'L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Temp';
libname input 'L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input';
libname out 'L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input';

 
* 	Formats from Statistics Denmark;
libname fmt  '\\srvfsenas3\formater\SAS formater i Danmarks Statistik\FORMATKATALOG';
options fmtsearch=(fmt.times_personstatistik fmt.times_bbr fmt.times_erhvervsstatistik fmt.brancher fmt.uddannelser fmt.geokoder fmt.disco fmt.statistikbank);

%let first=1997;
%let last=2018;


*****************************************************************************************************************************
*****************************************************************************************************************************
	1)	Load IDAN and AKM
*****************************************************************************************************************************
*****************************************************************************************************************************;


%macro load(data); 	
	data temp.&data. (drop = disco: _: arb_hoved_bra_DB07 timelon smal_timeloen type type_2008
							rename=(&data.SourceYear=year));
						*pnr type: _lbnr timelon tlonkval smal_timeloen persbrc persbrc07 arb_hoved_bra_DB07 &data.SourceYear ;
	set raw.&data.v (keep= _ALL_ rename=(pnr=_pnr lbnr=_lbnr persbrc=_persbrc persbrc07=_percbrc07 arb_hoved_bra_DB07=_arb_hoved_bra_DB07));
	
	where &data.SourceYear>=&first. & &data.SourceYear <= &last. & _pnr ne "";
	pnr=_pnr*1;
	lbnr=_lbnr*1;
	persbrc=_persbrc*1;
	persbrc07=_percbrc07*1;
	arb_hoved_bra_DB07=_arb_hoved_bra_DB07*1;

	if &data.SourceYear>=2008 then do;			
									if type_2008="H"; 
									type=type_2008;
									wage_rate=smal_timeloen;  									
									persbrc=arb_hoved_bra_DB07;											
									end;
	else  if &data.SourceYear<=2007 then do; 	
									if type="H";
									wage_rate=timelon;
								 	end;
		label 	wage_rate	=	"Hourly wage rate (main job in Nov.)"
				persbrc		=	"Personal industry with breaks db93/db03/db07 (main job in Nov.)"
				pnr			=	"Person id"
				lbnr 		= 	"Establishment id (main job in Nov.)";
run;
%mend;
%load(idanupd);


%macro load(data);
data temp.&data. (drop= _: disco_alle_indk_13 disco08: discoalle: discoloen: discosel:
						rename=(&data.01SourceYear=year));
	set raw.&data.01v (keep=_ALL_ 
					 rename=(pnr=_pnr));
	
	where &data.01SourceYear>=&first. and &data.01SourceYear <= &last. & _pnr ne "";
	pnr=_pnr*1;
											*Set Unknown ('Ikke oplyst') to missing; 
	if &data.01SourceYear lt 2010 then do; 	if disco_alle_indk_13 in ('000000','999900','999999') then disco_alle_indk_13="";
											disco = disco_alle_indk_13+0; 	end; 
								else do;	if disco08_alle_indk_13 in ('999999') then disco08_alle_indk_13="";
											disco = disco08_alle_indk_13+0; end;
run;
%mend;
%load(akmupd);




*****************************************************************************************************************************
*****************************************************************************************************************************
 	2)	Databreak in the classification of occupations (from disco-88 to disco-08) between 2009 and 2010:
	Create crosswalk:
	a) 	Select all individuals with an establishment identifier (lbnr) and an occupation code in 2009 and in 2010, who stay in 
		the same establishment.
	b)	For each disco-08 choose the most frequent disco-88 among the individuals defined in 2a)
	Implement:
	c)	Apply the crosswalk from 2010 onwards to those who change establishment between 2009 and 2010. 	
		For those who do not change establishment assume occupation has not changed either. Otherwise all occupations from
		2010 onwards would just be mapped into the most frequent disco-88 eliminating the less frequnt occupations and 
		creating an artificial high job turnover between 2009 and 2010. 
*****************************************************************************************************************************
*****************************************************************************************************************************; 


*	a)	;
data combined2009	(keep = pnr lbnr2009 disco2009 disco2009_4d);
	merge 	temp.idanupd (in=a where=(year=2009 & lbnr ne 0 & lbnr ne .)) 
			temp.akmupd (in=b where=(year=2009 & disco ne .));
	by pnr;
	if a;
	if b;
	lbnr2009=lbnr;
	disco2009=disco;
	disco2009_4d=int(disco/100);
run;
data combined2010	(keep = pnr lbnr disco disco_4d);
	merge 	temp.idanupd (in=a where=(year=2010 & lbnr ne 0 & lbnr ne .)) 
			temp.akmupd (in=b where=(year=2010 & disco ne .)); 
	by pnr;
	if a;
	if b;
	disco_4d=int(disco/100);
run;
data crosswalk_population;
	merge 	combined2009 (in=a)
			combined2010 (in=b);
	by pnr;
	if a;
	if b;
  	if lbnr2009=lbnr;
  	antal=1;
run;
proc sort data=crosswalk_population out=crosswalk_population_4d (drop = disco disco2009); by disco_4d disco2009_4d; run;
proc sort data=crosswalk_population (drop = disco_4d disco2009_4d); by disco disco2009; run;


*	b)	;
proc means data=crosswalk_population n noprint;
 	class disco disco2009;
 	var antal;
 	output out=crosswalk n=antal;
run;


proc sort data=crosswalk (where=(disco ne . and disco2009 ne .)); *proc means creates missings;
 	by disco descending antal;
run;

data temp.crosswalk_disco_20092010 (keep=disco disco88);
 	set crosswalk;
  	by disco;
  	if first.disco; * select most frequent;
  	rename disco2009=disco88; 
run;

*4d;
proc means data=crosswalk_population_4d n noprint;
 	class disco_4d disco2009_4d;
 	var antal;
 	output out=crosswalk_4d n=antal;
run;


proc sort data=crosswalk_4d (where=(disco_4d ne . and disco2009_4d ne .)); *proc means creates missings;
 	by disco_4d descending antal;
run;

data temp.crosswalk_disco_20092010_4d (keep=disco_4d disco88_4d);
 	set crosswalk_4d;
  	by disco_4d;
  	if first.disco_4d; * select most frequent;
  	rename disco2009_4d=disco88_4d; 
run;

*	c)	;
data idanakm1;														
	merge 	temp.idanupd
			temp.akmupd;
	by pnr year;
run;

data idanakm2;
	merge 	idanakm1 
			combined2009;
	by pnr;
run;
proc sort data=idanakm2; by disco; run;

data idanakm3;
	merge 	idanakm2 (in=a)
			temp.crosswalk_disco_20092010 (in=b); *gets us disco88;
	by disco;
	if a;
	disco_4d=int(disco/100);
run;
proc sort data=idanakm3; by disco_4d; run;
data idanakm4;
	merge 	idanakm3 (in=a)
			temp.crosswalk_disco_20092010_4d (in=b); * gets us disco88_4d;
	by disco_4d;
	if a;
run;	
	

proc sort data=idanakm4; by pnr year; run;


data out.idanakm (drop = lbnr2009 disco2009: rename=(disco88_4d=isco88 disco_4d=isco));
	set idanakm4;*data with crosswalk;
	*do not apply crosswalk in the following cases:;
	if year lt 2010 then do; 	disco88=disco; disco88_4d = disco_4d; 	end; 
					else do;	if (lbnr=lbnr2009 & lbnr not in (.,0) & disco2009 ne .) then disco88=disco2009; 
								if (lbnr=lbnr2009 & lbnr not in (.,0) & disco2009 ne .) then disco88_4d=disco2009_4d; 
								end;
	label 	disco88		=	"Disco-88 (6 digit)"
			disco88_4d	=	"Isco-88 (4 digit)"
			disco		=	"Disco88/disco08 (6 digit) with databreak 2009-2010"
			disco_4d	=	"Isco88/Isco08 (4 digit) with databreak 2009-2010";
run;


proc datasets library=work kill; 
run;quit;

proc datasets library=temp kill; 
run;quit;












